In [1]:
import pandas as pd
import numpy as np

starting_date = '20160701'
sample_numpy_data = np.array(np.arange(24)).reshape((6,4))
dates_index = pd.date_range(starting_date, periods=6)
sample_df = pd.DataFrame(sample_numpy_data, index=dates_index, columns=list('ABCD'))

sample_df_2 = sample_df.copy()
sample_df_2['Fruits'] = ['apple', 'orange','banana','strawberry','blueberry','pineapple']

sample_series = pd.Series([1,2,3,4,5,6], index=pd.date_range(starting_date, periods=6))
sample_df_2['Extra Data'] = sample_series *3 +1

second_numpy_array = np.array(np.arange(len(sample_df_2)))  *100 + 7
sample_df_2['G'] = second_numpy_array

sample_df_2


Out[1]:
A B C D Fruits Extra Data G
2016-07-01 0 1 2 3 apple 4 7
2016-07-02 4 5 6 7 orange 7 107
2016-07-03 8 9 10 11 banana 10 207
2016-07-04 12 13 14 15 strawberry 13 307
2016-07-05 16 17 18 19 blueberry 16 407
2016-07-06 20 21 22 23 pineapple 19 507

descriptive statistics


In [ ]:
pd.set_option('display.precision', 2)
sample_df_2.describe()
column mean

In [ ]:


In [ ]:


In [ ]:


In [ ]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.lower()

In [ ]: